Paired samples were obtained from each subject. For UC patients,
these were inflamed / UC tissue ("inflamed") and adjacent
normal tissue ("non-inflamed"). Similarly, for healthy
patients two location-matched samples were obtained. Note they do not
expect the non-inflamed samples to be similar to the healthy samples.
From the paper: “We confirmed that expression of an
inflammation-associated gene set increased from healthy to non-inflamed
to inflamed samples”.
Question of interest:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.table("~/Downloads/all.meta2.txt", sep="\t", skip=2)
colnames(data) <- c("NAME", "Cluster", "nGene", "nUMI", "Subject", "Health", "Location", "Sample")
table(data$Cluster)
##
## Best4+ Enterocytes CD4+ Activated Fos-hi CD4+ Activated Fos-lo
## 3286 10406 9421
## CD4+ Memory CD4+ PD1+ CD69- Mast
## 26809 417 143
## CD69+ Mast CD8+ IELs CD8+ IL17+
## 5654 3576 490
## CD8+ LP Cycling B Cycling Monocytes
## 14935 2211 350
## Cycling T Cycling TA DC1
## 557 18204 428
## DC2 Endothelial Enterocyte Progenitors
## 2391 2516 9713
## Enterocytes Enteroendocrine Follicular
## 4517 677 21468
## GC Glia Goblet
## 916 1262 2113
## ILCs Immature Enterocytes 1 Immature Enterocytes 2
## 509 7146 9726
## Immature Goblet Inflammatory Fibroblasts Inflammatory Monocytes
## 9554 2268 1652
## M cells Macrophages Microvascular
## 441 16692 663
## MT-hi Myofibroblasts NKs
## 442 1988 2023
## Pericytes Plasma Post-capillary Venules
## 774 82651 2367
## RSPO3+ Secretory TA Stem
## 362 4882 2403
## TA 1 TA 2 Tregs
## 33671 15774 6473
## Tuft WNT2B+ Fos-hi WNT2B+ Fos-lo 1
## 899 4309 6350
## WNT2B+ Fos-lo 2 WNT5B+ 1 WNT5B+ 2
## 3158 2355 3500
table(data$Subject) # 30 patients
##
## N10 N106 N11 N110 N111 N12 N13 N14 N15 N16 N17 N18 N19
## 16643 7542 6799 10404 25386 2364 4695 4952 10649 5417 5781 8059 7380
## N20 N21 N23 N24 N26 N44 N46 N49 N50 N51 N52 N539 N58
## 9563 6952 13180 13365 11224 14437 10317 8784 10501 23011 33330 7794 25266
## N661 N7 N8 N9
## 43187 4446 2224 11840
#table(data$Subject, data$Sample)
## For UC patients: inflamed (UC) and adjacent non-inflamed tissue.
table(data$Subject, data$Health)
##
## Healthy Inflamed Non-inflamed
## N10 16643 0 0
## N106 0 4848 2694
## N11 6799 0 0
## N110 0 3834 6570
## N111 0 19648 5738
## N12 0 1355 1009
## N13 4695 0 0
## N14 0 2276 2676
## N15 10649 0 0
## N16 5417 0 0
## N17 5781 0 0
## N18 8059 0 0
## N19 0 2423 4957
## N20 9563 0 0
## N21 6952 0 0
## N23 0 6341 6839
## N24 0 5511 7854
## N26 0 4360 6864
## N44 0 6392 8045
## N46 10317 0 0
## N49 0 4015 4769
## N50 0 4917 5584
## N51 23011 0 0
## N52 0 12901 20429
## N539 0 3345 4449
## N58 0 17599 7667
## N661 0 18589 24598
## N7 0 1833 2613
## N8 2224 0 0
## N9 0 4932 6908
dataEpi <- data[data$Location == "Epi",]
dataEpi <- dataEpi %>%
group_by(Sample, Cluster, Subject, Health) %>%
summarize(nCells = n())
## `summarise()` has grouped output by 'Sample', 'Cluster', 'Subject'. You can
## override using the `.groups` argument.
library(tidyverse)
dataEpiWide <- pivot_wider(dataEpi,
id_cols = c("Sample", "Subject", "Health"),
names_from = "Cluster",
values_from = "nCells")
dataEpiWide[is.na(dataEpiWide)] <- 0
dataEpiCountMatrix <- as.matrix(t(dataEpiWide[,-c(1:3)]))
colnames(dataEpiCountMatrix) <- dataEpiWide$Sample
dataEpiMeta <- dataEpiWide[,1:3]
keep <- rowSums(dataEpiCountMatrix > 4) >= 6
dataEpiCountMatrix <- dataEpiCountMatrix[keep,]
dataEpi <- dataEpi[dataEpi$Cluster %in% rownames(dataEpiCountMatrix),]
dataEpi2 <- dataEpi %>%
ungroup() %>%
group_by(Sample) %>%
mutate(nCellTypes = n(),
geoMean = prod(nCells+1)^(1/nCellTypes),
CLR = log((nCells+1) / geoMean ))
dataEpi2$Health <- factor(dataEpi2$Health, levels=c("Healthy", "Non-inflamed", "Inflamed"))
ggplot(dataEpi2, aes(x=CLR, y=Cluster, fill=Health)) +
geom_boxplot(outlier.size = 0) +
#geom_point(position = position_dodge(width = .75), size=1/5) +
theme_classic()
library(limma)
## Warning: package 'limma' was built under R version 4.2.2
library(voomCLR)
design <- model.matrix(~ Health, data=dataEpiMeta)
v <- voomCLR(counts = dataEpiCountMatrix,
design = design,
block = factor(dataEpiMeta$Subject),
plot = TRUE)
cf <- duplicateCorrelation(v, design, block=factor(dataEpiMeta$Subject))
v <- voomCLR(counts = dataEpiCountMatrix,
design = design,
block = factor(dataEpiMeta$Subject),
plot = TRUE,
correlation = cf$consensus.correlation)
cf <- duplicateCorrelation(v, design, block=factor(dataEpiMeta$Subject))
fit <- lmFit(v,
design,
block = factor(dataEpiMeta$Subject),
correlation = cf$consensus.correlation)
L <- matrix(0, nrow=ncol(fit$coefficients), ncol=3,
dimnames = list(colnames(fit$coefficients), c("NIvsH", "IvsH", "IvsNI")))
L["HealthNon-inflamed",c("NIvsH")] <- 1
L["HealthInflamed",c("IvsH")] <- 1
L[c("HealthInflamed", "HealthNon-inflamed"),3] <- c(1, -1)
conFit <- contrasts.fit(fit, contrasts = L)
conFit <- eBayes(conFit)
plotBeta(conFit)
resList <- list()
for(cc in 1:3){
ttbc <- topTableBC(conFit,
coef=cc,
number=Inf,
sort.by="none")#,
#bootstrap="nonparametric")
resList[[cc]] <- ttbc
}
## linda
library(MicrobiomeStat)
library(phyloseq)
## Warning: package 'phyloseq' was built under R version 4.2.2
lindaRes <- LinDA::linda(otu.tab = dataEpiCountMatrix, # rows features, cols samples
meta = as.data.frame(dataEpiMeta),
formula = '~ Health + (1|Subject)',
type = 'count',
adaptive=TRUE,
imputation = FALSE)
## Imputation approach is used.
# inflamed vs healthy
lindaIvsH <- lindaRes$output$HealthInflamed
# non-inflamed vs healthy
lindaNIvsH <- lindaRes$output$`HealthNon-inflamed`
## inflamed vs non-inflamed: code from developers on GitHub
alp1 <- lindaRes$output$HealthInflamed$log2FoldChange
se1 <- lindaRes$output$HealthInflamed$lfcSE
df1 <- lindaRes$output$HealthInflamed$df
alp3 <- lindaRes$output$`HealthNon-inflamed`$log2FoldChange
se3 <- lindaRes$output$`HealthNon-inflamed`$lfcSE
df3 <- lindaRes$output$`HealthNon-inflamed`$df
cov13 <- lindaRes$covariance$HealthInflamed$`HealthNon-inflamed`
stat_linDA <- (alp1-alp3) / (sqrt(se1^2+se3^2-2*cov13))
l2fc_linDA <- alp1-alp3
names(stat_linDA) <- names(l2fc_linDA) <- rownames(lindaRes$output$SLE_statusSLE)
pvalue_linDA <- 2 * pt(-abs(stat_linDA), (df1+df3)/2)
padj_linDA <- p.adjust(pvalue_linDA, method = 'BH')
lindaIvsNI <- data.frame(log2FoldChange=l2fc_linDA,
stat=stat_linDA,
pvalue=pvalue_linDA,
padj=padj_linDA,
row.names=rownames(lindaRes$output$HealthInflamed))
library(glmmTMB)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
resDfNBIvsH <- resDfNBNIvsH <- resDfNBIvsNI <- data.frame(population=rownames(dataEpiCountMatrix),
diff=rep(NA,nrow(dataEpiCountMatrix)),
se=rep(NA,nrow(dataEpiCountMatrix)),
pval=rep(NA,nrow(dataEpiCountMatrix)))
for(pp in 1:nrow(dataEpiCountMatrix)){
curY <- dataEpiCountMatrix[pp,]
curDf <- cbind(dataEpiMeta, y=curY)
m_p <- glmmTMB(y ~ Health + (1|Subject),
data=curDf,
family=nbinom2(link="log"),
offset = log(colSums(dataEpiCountMatrix)))
# inflamed vs healthy
resDfNBIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[2,1],
summary(m_p)$coefficients$cond[2,2],
summary(m_p)$coefficients$cond[2,4])
# non-inflamed vs healthy
resDfNBNIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[3,1],
summary(m_p)$coefficients$cond[3,2],
summary(m_p)$coefficients$cond[3,4])
## inflamed vs non-inflamed: contrast
L <- matrix(0, nrow=nrow(summary(m_p)$coef$cond), ncol=1)
rownames(L) <- rownames(summary(m_p)$coef$cond)
L[c("HealthInflamed", "HealthNon-inflamed"),1] <- c(1,-1)
m_p_contrast <- glht(m_p,
linfct=t(L),
coef. = function(x) fixef(x)[["cond"]],
vcov. = function(x) vcov(x)[["cond"]],
df=NULL)
resDfNBIvsNI[pp,2:4] <- c(summary(m_p_contrast)$test$coefficients,
summary(m_p_contrast)$test$sigma,
summary(m_p_contrast)$test$pvalues)
}
resDfNBIvsH$padj <- p.adjust(resDfNBIvsH$pval, "fdr")
resDfNBNIvsH$padj <- p.adjust(resDfNBNIvsH$pval, "fdr")
resDfNBIvsNI$padj <- p.adjust(resDfNBIvsNI$pval, "fdr")
nPopulations <- nrow(dataEpiCountMatrix)
resDfAll <- data.frame(population = unlist(lapply(resList, rownames)),
contrast = rep(c("NIvsH", "IvsH", "IvsNI"), each=nPopulations),
log2FC = unlist(lapply(resList, function(x) x$logFC)),
padj = unlist(lapply(resList, function(x) x$adj.P.Val))
)
resDfAll$DA <- resDfAll$padj <= 0.05
pHeatVoomCLR <- ggplot(resDfAll,
aes(x=contrast, y=population, color=log2FC, size=DA)) +
geom_point()+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
xlab("Contrast") +
ylab("Cell type")
pHeatVoomCLR
## Warning: Using size for a discrete variable is not advised.
lindaIvsH$contrast <- "IvsH"
lindaIvsH$population <- rownames(lindaIvsH)
lindaIvsNI$contrast <- "IvsNI"
lindaIvsNI$population <- rownames(lindaIvsNI)
lindaNIvsH$contrast <- "NIvsH"
lindaNIvsH$population <- rownames(lindaNIvsH)
selectCols <- c("population","log2FoldChange", "stat", "pvalue", "padj", "contrast")
resDfAllLinDA <- rbind(lindaIvsH[,selectCols],
lindaIvsNI[,selectCols],
lindaNIvsH[,selectCols])
resDfAllLinDA$DA <- resDfAllLinDA$padj <= 0.05
pHeatLinDA <- ggplot(resDfAllLinDA,
aes(x=contrast, y=population, color=log2FoldChange, size=DA)) +
geom_point()+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
xlab("Contrast") +
ylab("Cell type")
pHeatLinDA
## Warning: Using size for a discrete variable is not advised.
resDfNBIvsH$contrast <- "IvsH"
resDfNBIvsNI$contrast <- "IvsNI"
resDfNBNIvsH$contrast <- "NIvsH"
resDfAllNB <- rbind(resDfNBIvsH,
resDfNBIvsNI,
resDfNBNIvsH)
resDfAllNB$DA <- resDfAllNB$padj <= 0.05
pHeatNB <- ggplot(resDfAllNB,
aes(x=contrast, y=population, color=diff, size=DA)) +
geom_point()+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
xlab("Contrast") +
ylab("Cell type")
pHeatNB
## Warning: Using size for a discrete variable is not advised.
dataLP <- data[data$Location == "LP",]
dataLP <- dataLP %>%
group_by(Sample, Cluster, Subject, Health) %>%
summarize(nCells = n())
## `summarise()` has grouped output by 'Sample', 'Cluster', 'Subject'. You can
## override using the `.groups` argument.
library(tidyverse)
dataLPWide <- pivot_wider(dataLP,
id_cols = c("Sample", "Subject", "Health"),
names_from = "Cluster",
values_from = "nCells")
dataLPWide[is.na(dataLPWide)] <- 0
dataLPCountMatrix <- as.matrix(t(dataLPWide[,-c(1:3)]))
colnames(dataLPCountMatrix) <- dataLPWide$Sample
dataLPMeta <- dataLPWide[,1:3]
keep <- rowSums(dataLPCountMatrix > 4) >= 6
dataLPCountMatrix <- dataLPCountMatrix[keep,]
dataLP <- dataLP[dataLP$Cluster %in% rownames(dataLPCountMatrix),]
dataLP2 <- dataLP %>%
ungroup() %>%
group_by(Sample) %>%
mutate(nCellTypes = n(),
geoMean = prod(nCells+1)^(1/nCellTypes),
CLR = log((nCells+1) / geoMean ))
dataLP2$Health <- factor(dataLP2$Health, levels=c("Healthy", "Non-inflamed", "Inflamed"))
ggplot(dataLP2, aes(x=CLR, y=Cluster, fill=Health)) +
geom_boxplot(outlier.size = 0) +
#geom_point(position = position_dodge(width = .75), size=1/5) +
theme_classic()
library(limma)
library(voomCLR)
design <- model.matrix(~ Health, data=dataLPMeta)
v <- voomCLR(counts = dataLPCountMatrix,
design = design,
block = factor(dataLPMeta$Subject),
plot = TRUE)
cf <- duplicateCorrelation(v, design, block=factor(dataLPMeta$Subject))
v <- voomCLR(counts = dataLPCountMatrix,
design = design,
block = factor(dataLPMeta$Subject),
plot = TRUE,
correlation = cf$consensus.correlation)
cf <- duplicateCorrelation(v, design, block=factor(dataLPMeta$Subject))
fit <- lmFit(v,
design,
block = factor(dataLPMeta$Subject),
correlation = cf$consensus.correlation)
L <- matrix(0, nrow=ncol(fit$coefficients), ncol=3,
dimnames = list(colnames(fit$coefficients), c("NIvsH", "IvsH", "IvsNI")))
L["HealthNon-inflamed",c("NIvsH")] <- 1
L["HealthInflamed",c("IvsH")] <- 1
L[c("HealthInflamed", "HealthNon-inflamed"),3] <- c(1, -1)
conFit <- contrasts.fit(fit, contrasts = L)
conFit <- eBayes(conFit)
plotBeta(conFit)
resList <- list()
for(cc in 1:3){
ttbc <- topTableBC(conFit,
coef=cc,
number=Inf,
sort.by="none")#,
#bootstrap="nonparametric")
resList[[cc]] <- ttbc
}
## linda
library(MicrobiomeStat)
library(phyloseq)
lindaRes <- LinDA::linda(otu.tab = dataLPCountMatrix, # rows features, cols samples
meta = as.data.frame(dataLPMeta),
formula = '~ Health + (1|Subject)',
type = 'count',
adaptive=TRUE,
imputation = FALSE)
## Imputation approach is used.
# inflamed vs healthy
lindaIvsH <- lindaRes$output$HealthInflamed
# non-inflamed vs healthy
lindaNIvsH <- lindaRes$output$`HealthNon-inflamed`
## inflamed vs non-inflamed: code from developers on GitHub
alp1 <- lindaRes$output$HealthInflamed$log2FoldChange
se1 <- lindaRes$output$HealthInflamed$lfcSE
df1 <- lindaRes$output$HealthInflamed$df
alp3 <- lindaRes$output$`HealthNon-inflamed`$log2FoldChange
se3 <- lindaRes$output$`HealthNon-inflamed`$lfcSE
df3 <- lindaRes$output$`HealthNon-inflamed`$df
cov13 <- lindaRes$covariance$HealthInflamed$`HealthNon-inflamed`
stat_linDA <- (alp1-alp3) / (sqrt(se1^2+se3^2-2*cov13))
l2fc_linDA <- alp1-alp3
names(stat_linDA) <- names(l2fc_linDA) <- rownames(lindaRes$output$SLE_statusSLE)
pvalue_linDA <- 2 * pt(-abs(stat_linDA), (df1+df3)/2)
padj_linDA <- p.adjust(pvalue_linDA, method = 'BH')
lindaIvsNI <- data.frame(log2FoldChange=l2fc_linDA,
stat=stat_linDA,
pvalue=pvalue_linDA,
padj=padj_linDA,
row.names=rownames(lindaRes$output$HealthInflamed))
library(glmmTMB)
library(multcomp)
resDfNBIvsH <- resDfNBNIvsH <- resDfNBIvsNI <- data.frame(population=rownames(dataLPCountMatrix),
diff=rep(NA,nrow(dataLPCountMatrix)),
se=rep(NA,nrow(dataLPCountMatrix)),
pval=rep(NA,nrow(dataLPCountMatrix)))
for(pp in 1:nrow(dataLPCountMatrix)){
curY <- dataLPCountMatrix[pp,]
curDf <- cbind(dataLPMeta, y=curY)
m_p <- glmmTMB(y ~ Health + (1|Subject),
data=curDf,
family=nbinom2(link="log"),
offset = log(colSums(dataLPCountMatrix)))
# inflamed vs healthy
resDfNBIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[2,1],
summary(m_p)$coefficients$cond[2,2],
summary(m_p)$coefficients$cond[2,4])
# non-inflamed vs healthy
resDfNBNIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[3,1],
summary(m_p)$coefficients$cond[3,2],
summary(m_p)$coefficients$cond[3,4])
## inflamed vs non-inflamed: contrast
L <- matrix(0, nrow=nrow(summary(m_p)$coef$cond), ncol=1)
rownames(L) <- rownames(summary(m_p)$coef$cond)
L[c("HealthInflamed", "HealthNon-inflamed"),1] <- c(1,-1)
m_p_contrast <- glht(m_p,
linfct=t(L),
coef. = function(x) fixef(x)[["cond"]],
vcov. = function(x) vcov(x)[["cond"]],
df=NULL)
resDfNBIvsNI[pp,2:4] <- c(summary(m_p_contrast)$test$coefficients,
summary(m_p_contrast)$test$sigma,
summary(m_p_contrast)$test$pvalues)
}
resDfNBIvsH$padj <- p.adjust(resDfNBIvsH$pval, "fdr")
resDfNBNIvsH$padj <- p.adjust(resDfNBNIvsH$pval, "fdr")
resDfNBIvsNI$padj <- p.adjust(resDfNBIvsNI$pval, "fdr")
nPopulations <- nrow(dataLPCountMatrix)
resDfAll <- data.frame(population = unlist(lapply(resList, rownames)),
contrast = rep(c("NIvsH", "IvsH", "IvsNI"), each=nPopulations),
log2FC = unlist(lapply(resList, function(x) x$logFC)),
padj = unlist(lapply(resList, function(x) x$adj.P.Val))
)
resDfAll$DA <- resDfAll$padj <= 0.05
pHeatVoomCLR <- ggplot(resDfAll,
aes(x=contrast, y=population, color=log2FC, size=DA)) +
geom_point()+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
xlab("Contrast") +
ylab("Cell type")
pHeatVoomCLR
## Warning: Using size for a discrete variable is not advised.
lindaIvsH$contrast <- "IvsH"
lindaIvsH$population <- rownames(lindaIvsH)
lindaIvsNI$contrast <- "IvsNI"
lindaIvsNI$population <- rownames(lindaIvsNI)
lindaNIvsH$contrast <- "NIvsH"
lindaNIvsH$population <- rownames(lindaNIvsH)
selectCols <- c("population","log2FoldChange", "stat", "pvalue", "padj", "contrast")
resDfAllLinDA <- rbind(lindaIvsH[,selectCols],
lindaIvsNI[,selectCols],
lindaNIvsH[,selectCols])
resDfAllLinDA$DA <- resDfAllLinDA$padj <= 0.05
pHeatLinDA <- ggplot(resDfAllLinDA,
aes(x=contrast, y=population, color=log2FoldChange, size=DA)) +
geom_point()+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
xlab("Contrast") +
ylab("Cell type")
pHeatLinDA
## Warning: Using size for a discrete variable is not advised.
resDfNBIvsH$contrast <- "IvsH"
resDfNBIvsNI$contrast <- "IvsNI"
resDfNBNIvsH$contrast <- "NIvsH"
resDfAllNB <- rbind(resDfNBIvsH,
resDfNBIvsNI,
resDfNBNIvsH)
resDfAllNB$DA <- resDfAllNB$padj <= 0.05
pHeatNB <- ggplot(resDfAllNB,
aes(x=contrast, y=population, color=diff, size=DA)) +
geom_point()+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
xlab("Contrast") +
ylab("Cell type")
pHeatNB
## Warning: Using size for a discrete variable is not advised.